import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly
from plotly.offline import download_plotlyjs, init_notebook_mode, iplot
init_notebook_mode()
%matplotlib inline
from IPython.display import HTML
HTML('''<script>
code_show=true;
function code_toggle() {
if (code_show){
$('div.input').hide();
} else {
$('div.input').show();
}
code_show = !code_show
}
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')
df = pd.read_csv("cleaned.csv")
df0 = df
tran = pd.read_csv("train_2016_v2.csv")
#clean up the transaction dates by leaving only the month for easier graphics
yearmonth = []
for i in range(0,len(tran)):
yearmonth.append(tran["transactiondate"][i][0:7])
tran["yearmonth"] = yearmonth
tran =tran.sort_values(by=['yearmonth'])
transaction = pd.merge(tran,df, on='parcelid', how='inner')
transaction = transaction.sort_values(by=['yearmonth'])
#transaction.groupby('fips').count()["parcelid"]
#fips = county code
#6037 = los angeles
#6059 = Orange county
#6111 = Ventura County
jan = transaction[transaction["yearmonth"] == '2016-01'].groupby('fips').count()["parcelid"].values.tolist()
feb = transaction[transaction["yearmonth"] == '2016-02'].groupby('fips').count()["parcelid"].values.tolist()
march = transaction[transaction["yearmonth"] == '2016-03'].groupby('fips').count()["parcelid"].values.tolist()
april = transaction[transaction["yearmonth"] == '2016-04'].groupby('fips').count()["parcelid"].values.tolist()
may = transaction[transaction["yearmonth"] == '2016-05'].groupby('fips').count()["parcelid"].values.tolist()
june = transaction[transaction["yearmonth"] == '2016-06'].groupby('fips').count()["parcelid"].values.tolist()
july = transaction[transaction["yearmonth"] == '2016-07'].groupby('fips').count()["parcelid"].values.tolist()
aug = transaction[transaction["yearmonth"] == '2016-08'].groupby('fips').count()["parcelid"].values.tolist()
sep = transaction[transaction["yearmonth"] == '2016-09'].groupby('fips').count()["parcelid"].values.tolist()
octo = transaction[transaction["yearmonth"] == '2016-10'].groupby('fips').count()["parcelid"].values.tolist()
nov = transaction[transaction["yearmonth"] == '2016-11'].groupby('fips').count()["parcelid"].values.tolist()
dec = transaction[transaction["yearmonth"] == '2016-12'].groupby('fips').count()["parcelid"].values.tolist()
pcts = [jan,feb,march,april, may,june, july,octo, sep, nov, dec]
l1= []
l2 = []
l3 = []
for i in pcts:
l1.append(i[0])
l2.append(i[1])
l3.append(i[2])
import plotly
import plotly.graph_objs as go
x = ["January", "February", "March","April","May", "June","July","August", "September", "October","November","December"]
trace1 = go.Bar(
x=x,
y=l1,
name='Los Angeles'
)
trace2 = go.Bar(
x=x,
y=l2,
name='Orange County'
)
trace3 = go.Bar(
x=x,
y=l3,
name='Ventura County'
)
data = [trace1, trace2,trace3]
layout = go.Layout(
barmode='stack',
title = "Number of Transactions by Month"
)
fig = go.Figure(data=data, layout=layout)
plotly.offline.iplot(fig, filename='stacked-bar')
convert_type={31.0: 'Commercial', 46.0: 'MultiStory Store', 47.0: 'Store/Office',
246.0: 'Duplex',247.0: 'Triplex',248.0:'Quadruplex', 260.0: 'Residential General',
261.0: 'Single Family Residential',262.0: 'Rural Residence',263.0: 'Mobile Home',
264.0: 'Townhouse', 265.0: 'Cluster Home', 266.0: 'Condominium',267.0: 'Cooperative',
268.0: 'Row House',269.0: 'Planned Unit Development', 270.0: 'Residential Common Area',
271.0: 'Timeshare', 273.0: 'Bungalow', 274.0: 'Zero Lot Line', 275.0: 'Manufactured/Modular Homes',
276.0: 'Patio Home', 279.0: 'Inferred Single Family', 290.0: 'Vacant Land',291.0: 'Vacant Land' }
transaction['propertylandusetypeid']=transaction['propertylandusetypeid'].map(convert_type)
LA = transaction[transaction["fips"] ==6037]["propertylandusetypeid"].value_counts()#6037 = los angeles
orange = transaction[transaction["fips"] ==6059]["propertylandusetypeid"].value_counts()#6037 = los angeles
ventura = transaction[transaction["fips"] ==6111]["propertylandusetypeid"].value_counts() #6111 = Ventura County
import plotly
import plotly.graph_objs as go
trace1 = go.Bar(
x=LA.index[0:3],
y=LA.tolist()[0:3],
name='Los Angeles',
)
trace2 = go.Bar(
x=orange.index[0:2],
y=orange.tolist()[0:2],
name='Orange County'
)
trace3 = go.Bar(
x=ventura.index[0:3],
y=ventura.tolist()[0:3],
name='Ventura County'
)
data = [trace1, trace2, trace3]
layout = go.Layout(
barmode='group'
)
fig = go.Figure(data=data, layout=layout)
plotly.offline.iplot(fig, filename='grouped-bar')
transaction["latitude"] = transaction["latitude"]/1000000
transaction["longitude"] = transaction["longitude"]/1000000
np.random.seed(11452)
train = np.random.rand(len(transaction)) <0.5
trans_train = transaction[train]
#len(trans_train)
import plotly
from plotly.plotly import iplot
import plotly.graph_objs as go
mapbox_access_token = 'pk.eyJ1IjoiYmlwYXJ0aXRlaHlwZXJjdWJlIiwiYSI6ImNqczFjZzUydjF0MGc0OW1sYWIwYW5lY2UifQ.l6OVeSa3poXp6S4s8km8kA'
map_style = "mapbox://styles/bipartitehypercube/cjs26r8sw1cy31fqjhopqigtj"
scl = [0,"rgb(0,0,0)"],[0.25,"rgb(0, 0, 128)"],[0.5,"rgb(0, 25, 255)"],\
[0.75,"rgb(0, 0, 205)"],[1,"rgb(250, 250, 250)"]
data = [go.Scattermapbox(
lat=trans_train['latitude'],
lon=trans_train['longitude'],
text = trans_train['taxvaluedollarcnt'].astype(str) + ' Dollars',
mode='markers',
marker = dict(color = (trans_train['taxvaluedollarcnt']),colorscale = scl,reversescale = True,opacity = 0.5,size = 5,
colorbar = dict(thickness = 10,titleside = "right",
outlinecolor = "rgba(68, 68, 68, 0)",
ticks = "outside",ticklen = 3,
showticksuffix = "last",
ticksuffix = " Dollars",dtick = 1000000)))]
layout = go.Layout(
autosize=True,
hovermode='closest',
mapbox=dict(
accesstoken=mapbox_access_token,
bearing=0,
center=dict(lat=34,lon=-118.5),
style= map_style,
pitch=0,
zoom=7,),
title = 'Transaction location with Property Price',
)
fig = dict(data=data, layout=layout)
plotly.offline.iplot(fig, filename='Geolocation of properties')
import plotly
from plotly.plotly import iplot
import plotly.graph_objs as go
mapbox_access_token = 'pk.eyJ1IjoiYmlwYXJ0aXRlaHlwZXJjdWJlIiwiYSI6ImNqczFjZzUydjF0MGc0OW1sYWIwYW5lY2UifQ.l6OVeSa3poXp6S4s8km8kA'
map_style = "mapbox://styles/bipartitehypercube/cjs26r8sw1cy31fqjhopqigtj"
scl = [0,"rgb(0,0,0)"],[0.25,"rgb(0, 0, 128)"],[0.5,"rgb(0, 25, 255)"],\
[0.75,"rgb(0, 0, 205)"],[1,"rgb(250, 250, 250)"]
data = [go.Scattermapbox(
lat=trans_train['latitude'],
lon=trans_train['longitude'],
text = trans_train["calculatedfinishedsquarefeet"].astype(str) + ' Square Feet',
mode='markers',
marker = dict(color = (trans_train["calculatedfinishedsquarefeet"]),colorscale = scl,reversescale = True,opacity = 0.7,size = 5,
colorbar = dict(thickness = 10,titleside = "right",
outlinecolor = "rgba(68, 68, 68, 0)",
ticks = "outside",ticklen = 3,
showticksuffix = "last",
ticksuffix = " Square Feet",dtick = 700)))]
layout = go.Layout(
autosize=True,
hovermode='closest',
mapbox=dict(
accesstoken=mapbox_access_token,
bearing=0,
center=dict(lat=34,lon=-118.5),
style= map_style,
pitch=0,
zoom=7,),
title = 'Transaction location with Property Size',
)
fig = dict(data=data, layout=layout)
plotly.offline.iplot(fig, filename='Geolocation of properties')
import plotly
from plotly.plotly import iplot
import plotly.graph_objs as go
mapbox_access_token = 'pk.eyJ1IjoiYmlwYXJ0aXRlaHlwZXJjdWJlIiwiYSI6ImNqczFjZzUydjF0MGc0OW1sYWIwYW5lY2UifQ.l6OVeSa3poXp6S4s8km8kA'
map_style = "mapbox://styles/bipartitehypercube/cjs26r8sw1cy31fqjhopqigtj"
scl = [0,"rgb(0,0,0)"],[0.25,"rgb(0, 0, 128)"],[0.5,"rgb(0, 25, 255)"],\
[0.75,"rgb(0, 0, 205)"],[1,"rgb(250, 250, 250)"]
data = [go.Scattermapbox(
lat=trans_train['latitude'],
lon=trans_train['longitude'],
text = (trans_train['taxvaluedollarcnt']/trans_train["calculatedfinishedsquarefeet"]).astype(str) + ' Dollars Per Square Feet',
mode='markers',
marker = dict(color = (trans_train['taxvaluedollarcnt']/trans_train["calculatedfinishedsquarefeet"]),colorscale = scl,reversescale = True,opacity = 0.7,size = 5,
colorbar = dict(thickness = 10,titleside = "right",
outlinecolor = "rgba(68, 68, 68, 0)",
ticks = "outside",ticklen = 3,
showticksuffix = "last",
ticksuffix = " Dollars Per Square Feet",dtick = 700)))]
layout = go.Layout(
autosize=True,
hovermode='closest',
mapbox=dict(
accesstoken=mapbox_access_token,
bearing=0,
center=dict(lat=34,lon=-118.5),
style= map_style,
pitch=0,
zoom=7,),
title = 'Transaction location',
)
fig = dict(data=data, layout=layout)
plotly.offline.iplot(fig, filename='Geolocation of properties')
freq = df["yearbuilt"].value_counts()
freq = freq.values
freq = freq.tolist()
years = df["yearbuilt"].unique()
years = years.tolist()
years1 = transaction["yearbuilt"].unique()
years1 = years1.tolist()
freq1 = transaction["yearbuilt"].value_counts()
freq1 = freq1.values
freq1 = freq1.tolist()
ly0 = []
for i in years:
ly0.append(int(i))
ly1 = []
for i in years1:
ly1.append(int(i))
overall = pd.DataFrame({'yearbuilt': ly0,'Marketprop': freq})
transact = pd.DataFrame({'yearbuilt': ly1,'Transactionofprop': freq1})
comparison = pd.merge(overall,transact, on = 'yearbuilt', how = 'outer')
comparison = comparison.fillna(0)
#comparison['yearbuilt'] = comparison['yearbuilt'].apply(str)
from sklearn import preprocessing
min_max_scaler = preprocessing.MinMaxScaler()
column_names_to_normalize = ['Marketprop','Transactionofprop']
x = comparison[column_names_to_normalize].values
x_scaled = min_max_scaler.fit_transform(x)
df_temp = pd.DataFrame(x_scaled, columns=column_names_to_normalize, index = comparison.index)
comparison[column_names_to_normalize] = df_temp
#comparison[comparison["Transactionofprop"] > 0.8]
#comparison[comparison["Marketprop"] > 0.8]
The left plot (blue line) indicates that the overal supply by the age of the house in the market.
The left plot (orange line) indicates the demand of properties based on the year built (age).
import matplotlib.pyplot as plt
from matplotlib import pylab
import datetime
import numpy as np
import seaborn as sns; sns.set()
palette = dict(zip([1,2],
sns.color_palette("nipy_spectral_r", 4)))
fig, (ax1, ax2)= plt.subplots(ncols = 2,figsize=(16, 5))
ax1 = sns.lineplot(comparison['yearbuilt'] ,comparison["Marketprop"], data = comparison, palette = palette,ax=ax1, label = "Property Supply")
ax1 = sns.lineplot(comparison['yearbuilt'],comparison["Transactionofprop"], data = comparison, palette = palette,ax=ax1, label = "Property Demand")
ax1.set(xlabel ='Year Built')
ax1.set(ylabel = 'Normalized Number of Properties')
ax1.set_title('Properties by Year Built')
ax1.set_xlim(1880,2020)
leg =ax1.legend()
ax2 = sns.lineplot(transaction['yearbuilt'],abs(transaction["logerror"]), data = transaction, ax = ax2, label = "Log Error")
ax2.set(xlabel ='Year Built')
ax2.set(ylabel = 'Absolute Logerror')
ax2.set_title("Logerror Fluctuations by Year Built")
ax1.set_xlim(1880,2020)
plt.show()